Prepare carbon data

Read and compute POC attenuation using SOCA data

Author

Thelma Panaïotis

source("utils.R")

Canyon (ARGO + sat + ML) − Sauzède et al.

Read POC data from Sauzède et al. 2020, built using ML, ARGO and satellite ocean colour observations.

Read data

# List NC files to read
files <- list.files("data/raw/soca/", full.names = TRUE)

# Open one file to get dims
nc <- nc_open(files[1])
lon <- ncvar_get(nc, "longitude")
lat <- ncvar_get(nc, "latitude")
depth <- ncvar_get(nc, "depth")
nc_close(nc)

# Prepare storage for 12 months
poc_mo <- array(NA, c(length(lon), length(lat), length(depth), 12))
# Read all files
for(i in 1:12) {
  # get relevant files
  file <- files[i]
  # open the file and read the data in it
  nc <- nc_open(file)
  poc_mo[,,,i] <- ncvar_get(nc, "poc")
  nc_close(nc)
}

# Plot surface POC in January
image.plot(poc_mo[,,1,1], col = col_poc)

Looks good, compute annual climatology from mensual data.

Compute annual climatology

# Do it parallel on depth
poc_ann <- mclapply(1:length(depth), function(d) {
  # Get one layer
  s_block <- poc_mo[,,d,]
  # Compute annual mean for each pixel
  return(apply(s_block, c(1, 2), mean, na.rm = TRUE))
}, mc.cores = n_cores) %>%
  abind(along = 3)

# Plot surface climatology
image.plot(poc_ann[,,1], col = col_poc)

# Plot 1000 m climatology
image.plot(poc_ann[,,35], col = col_poc)

Convert to dataframe

# To vector and to single column df
df_can <- poc_ann %>% as.vector() %>% as.data.frame() %>% setNames("poc")
# Add lon, lat and depth
df_can$lon <- lon
df_can$lat <- rep(lat, each=length(lon))
df_can$depth <- rep(depth, each=length(lon)*length(lat))
# Convert to tibble
df_can <- df_can %>% as_tibble() %>% select(lon, lat, depth, poc)

# Plot surface map
df_can %>%
  filter(depth == 0) %>%
  ggplot() +
  geom_polygon(data = world, aes(x = lon, y = lat, group = group), fill = "grey") +
  geom_tile(aes(x = lon, y = lat, colour = poc, fill = poc)) +
  scale_colour_cmocean(name = "matter", na.value = NA) + 
  scale_fill_cmocean(name = "matter", na.value = NA) + 
  labs(title = "Surface POC") +
  coord_quickmap(expand = 0)

Compute attenuation as Martin’s b

Compute attenuation on a few profiles and generate some plots.

# Drop pixels with only NA and generate unique pixel id
df_can <- df_can %>%
  drop_na(poc) %>%
  mutate(pix_id = paste0(lon, "_", lat), .before = lon)

# Select a few profiles for plotting
# Originally 515000 profiles
sel_pix <- df_can %>%
  filter(depth == 0) %>%
  slice_sample(n = 49)
df_can_sub <- df_can %>%
  filter(pix_id %in% sel_pix$pix_id)

# Find reference depth as the depth with maximum poc
df_can_sub <- df_can_sub %>%
  arrange(pix_id,) %>% 
  group_by(pix_id, lon, lat) %>% 
  mutate(
    poc_max = cummax(poc),
    keep = poc_max == max(poc)
  ) %>% 
  filter(keep) %>% 
  mutate(z0 = min(depth)) %>% 
  ungroup() %>% 
  select(-c(poc_max, keep))

# Perform linear regression on log-transformed data
argo_regs <- df_can_sub %>%
  #filter(depth >= z0) %>%
  rename(z = depth) %>%
  arrange(pix_id) %>%
  nest(data = c(poc, z, z0)) %>%
  mutate(
    fit = map(data, ~lm(log(poc) ~ log(z/z0), data = .x)),
    glance = map(fit, glance),
    tidied = map(fit, tidy)
  )

# glance contains all summary of fits
argo_summ <- argo_regs %>%
  select(pix_id, lon, lat, glance) %>%
  unnest(glance)

# tidied contains coefficients
argo_coef <- argo_regs %>%
  select(pix_id, lon, lat, tidied) %>%
  unnest(tidied) %>%
  # keep only estimates of slope and intercept
  select(-c(std.error, statistic, p.value)) %>%
  mutate(
    term = case_when(
      term == "(Intercept)" ~ "intercept",
      .default = "b"
    )) %>%
  # 2 rows (intercept + slope) for each profile, reshape to make it 1 row
  pivot_wider(names_from = term, values_from = estimate)

# Let’s join both together
argo_coef <- argo_coef %>% left_join(argo_summ, by = join_by(pix_id, lon, lat))

# Make b positive
argo_coef <- argo_coef %>% mutate(b = -b)

# Plot a few fits
# Generate data
b_curve <- df_can_sub %>% 
  select(pix_id, lon, lat, z = depth, z0) %>% 
  left_join(
    argo_coef %>% select(pix_id, intercept, b, adj.r.squared), 
    by = join_by(pix_id)
  ) %>% 
   mutate(poc = exp((-b) * log(z/z0) + intercept))
# Plo it
ggplot() +
  geom_path(data = b_curve, aes(x = poc, y = -z, colour = b), linewidth = 1) +
  geom_point(data = df_can_sub, aes(x = poc, y = -depth), size = 0.5) +
  scale_colour_viridis_c() +
  facet_wrap(~pix_id)

Look at trends of b value in 1/5 of the whole dataset.

# Get 1/5 pix in both lon and lat to plot map of b values
df_can_map <- df_can %>% 
  #select(pix_id, lon, lat) %>% 
  #unique() %>% 
  arrange(lat) %>% 
  group_by(lat) %>% 
  mutate(lat_id = cur_group_id()) %>% 
  ungroup() %>% 
  arrange(lon) %>% 
  group_by(lon) %>% 
  mutate(lon_id = cur_group_id()) %>% 
  ungroup() %>% 
  filter(lon_id %% 5 == 0) %>% 
  filter(lat_id %% 5 == 0) %>% 
  select(-c(lat_id, lon_id))

# Compute reference depth
df_can_map <- df_can_map %>%
  filter(depth > 0) %>% # reference depth cannot be 0
  arrange(pix_id,) %>% 
  group_by(pix_id, lon, lat) %>% 
  mutate(
    poc_max = cummax(poc),
    keep = poc_max == max(poc)
  ) %>% 
  filter(keep) %>% 
  mutate(z0 = min(depth)) %>% 
  ungroup() %>% 
  select(-c(poc_max, keep))

# Perform linear regression on log-transformed data
argo_map_regs <- df_can_map %>%
  #filter(depth >= z0) %>%
  rename(z = depth) %>%
  arrange(pix_id) %>%
  nest(data = c(poc, z, z0)) %>%
  mutate(
    fit = map(data, ~lm(log(poc) ~ log(z/z0), data = .x)),
    glance = map(fit, glance),
    tidied = map(fit, tidy)
  )

# glance contains all summary of fits
argo_map_summ <- argo_map_regs %>%
  select(pix_id, lon, lat, glance) %>%
  unnest(glance)

# tidied contains coefficients
argo_map_coef <- argo_map_regs %>%
  select(pix_id, lon, lat, tidied) %>%
  unnest(tidied) %>%
  # keep only estimates of slope and intercept
  select(-c(std.error, statistic, p.value)) %>%
  mutate(
    term = case_when(
      term == "(Intercept)" ~ "intercept",
      .default = "b"
    )) %>%
  # 2 rows (intercept + slope) for each profile, reshape to make it 1 row
  pivot_wider(names_from = term, values_from = estimate)


# Let’s join both together
argo_map_coef <- argo_map_coef %>% left_join(argo_map_summ, by = join_by(pix_id, lon, lat))

# Make b positive
argo_map_coef <- argo_map_coef %>% mutate(b = -b)

# Plot map of b value
argo_map_coef %>% 
  ggplot() + 
  geom_polygon(data = world, aes(x = lon, y = lat, group = group), fill = "grey") +
  geom_tile(aes(x = lon, y = lat, fill = b, colour = b)) +
  scale_fill_viridis_c() +
  scale_colour_viridis_c() +
  coord_quickmap(expand = 0)

Compute attenuation for locations of UVP profiles.

# Load UVP data
load("data/01.uvp_profiles.Rdata")

# Match lon and lat to POC dataset
profiles_r <- profiles %>%
  select(lon, lat) %>% 
  mutate(
    lon = roundp(profiles$lon, precision = 0.25, f = floor) + 0.125,
    lat = roundp(profiles$lat, precision = 0.25, f = floor) + 0.125
  ) %>% 
  # Keep only unique pixels
  distinct()

# Get corresponding POC profiles
df_can_uvp <- profiles_r %>% 
  left_join(df_can, by = join_by(lon, lat)) %>% 
  drop_na(poc)

# Compute reference depth
df_can_uvp <- df_can_uvp %>%
  filter(depth > 0) %>% # reference depth cannot be 0
  group_by(lon, lat) %>% 
  mutate(
    poc_max = cummax(poc),
    keep = poc_max == max(poc)
  ) %>% 
  filter(keep) %>% 
  mutate(z0 = min(depth)) %>% 
  ungroup() %>% 
  select(-c(poc_max, keep))

# Perform linear regression on log-transformed data
argo_uvp_regs <- df_can_uvp %>%
  filter(depth >= z0) %>%
  rename(z = depth) %>%
  nest(data = c(poc, z, z0)) %>%
  mutate(
    fit = map(data, ~lm(log(poc) ~ log(z/z0), data = .x)),
    glance = map(fit, glance),
    tidied = map(fit, tidy)
  )

# glance contains all summary of fits
argo_uvp_summ <- argo_uvp_regs %>%
  select(lon, lat, glance) %>%
  unnest(glance)

# tidied contains coefficients
argo_uvp_coef <- argo_uvp_regs %>%
  select(lon, lat, tidied) %>%
  unnest(tidied) %>%
  # keep only estimates of slope and intercept
  select(-c(std.error, statistic, p.value)) %>%
  mutate(
    term = case_when(
      term == "(Intercept)" ~ "intercept",
      .default = "b"
    )) %>%
  # 2 rows (intercept + slope) for each profile, reshape to make it 1 row
  pivot_wider(names_from = term, values_from = estimate)

# Make b positive
argo_uvp_coef <- argo_uvp_coef %>% mutate(b = -b)

# Join UVP with POC b values and drop profiles with no b value
df_c <- profiles_r %>% 
  left_join(argo_uvp_coef %>% select(lon, lat, att = b), by = join_by(lon, lat)) %>% 
  drop_na(att)

# Plot map of profiles
ggplot(df_c) +
  geom_polygon(data = world, aes(x = lon, y = lat, group = group), fill = "grey") +
  geom_point(aes(x = lon, y = lat, colour = att), size = 0.5) +
  scale_colour_viridis_c() +
  labs(title = "Map of b values for UVP dataset") +
  coord_quickmap(expand = 0)

# Plot latitudinal trend
ggplot(df_c, aes(x = lat, y = att)) +
  geom_point(size = 0.5) +
  geom_smooth() +
  coord_flip()

# Distribution of b-value
ggplot(df_c) + geom_histogram(aes(x = att), bins = 100)

Save

save(df_c, file = "data/02.carbon_data.Rdata")